package org.luosl.webmagicx.pipeline.component

import java.util
import java.util.concurrent.locks.ReentrantReadWriteLock

import us.codecraft.webmagic.ResultItems

/**
  * 基于hashset的单机去重机制
  * Created by luosl on 2017/12/14.
  */
class HashSetDistinct(loadCache: util.HashSet[Any] => Unit,distinctValue:ResultItems => Any) extends Distinct(distinctValue){

  private val set:util.HashSet[Any] = new util.HashSet[Any]

  private val lock:ReentrantReadWriteLock = new ReentrantReadWriteLock()

  /**
    * 加载缓存
    */
  loadCache(set)

  override def addItem(item: Any): Unit = {
    try {
      lock.writeLock().lock()
      set.add(item)
    }finally {
      lock.writeLock().unlock()
    }

  }

  override def isUnique(item: Any): Boolean = {
    try{
      lock.readLock().lock()
      !set.contains(item)
    }finally {
      lock.readLock().unlock()
    }
  }
}
